#Jonas Kulakauskas
#HW4
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import sqlite3 as sq3
#1
multilevel_lookup = pd.read_csv('https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt', sep = "\t").drop(['Level5'], axis = 1)
multilevel_lookup = multilevel_lookup.rename(columns = {
"modify" : "roi",
"modify.1" : "level4",
"modify.2" : "level3",
"modify.3" : "level2",
"modify.4" : "level1"})
multilevel_lookup = multilevel_lookup[['roi', 'level4', 'level3', 'level2', 'level1']]
id = 127
subjectData = pd.read_csv("https://raw.githubusercontent.com/bcaffo/ds4bme_intro/master/data/kirby21.csv")
subjectData = subjectData.loc[(subjectData.type == 1) & (subjectData.level == 5) & (subjectData.id == id)]
subjectData = subjectData[['roi', 'volume']]
subjectData = pd.merge(subjectData, multilevel_lookup, on = "roi")
subjectData = subjectData.assign(icv = "ICV")
subjectData = subjectData.assign(comp = subjectData.volume / np.sum(subjectData.volume))
summary = subjectData[["icv", "level1", "level2"]].groupby(["icv", "level1", "level2"], as_index = False).mean()
unique_sum = pd.unique(summary.values.ravel())
unique_sum = unique_sum.tolist()
fig = go.Figure(data=[go.Sankey(
node = dict(
pad = 15,
thickness = 15,
line = dict(color = "black", width = 0.5),
label = unique_sum
),
link = dict(
source = [0, 1, 1, 1, 1, 0, 6, 6, 0, 9, 9, 0, 12, 12, 0, 15, 15, 0, 18, 18, 0, 21, 21, 21, 0, 25, 25, 25],
target = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28],
value = [109787, 33975, 30018, 44979, 815, 11817, 5475, 6342, 11560, 5188, 6372, 10271, 4945, 5326, 159411, 79498, 79913, 4975, 2403, 2572, 531153, 276982, 12381, 241790, 534444, 282874, 13077, 247493],
label = unique_sum
))])
fig.update_layout(title_text="HW4 Subject Data Sankey Diagram", font_size=10)
fig.show()
#3
jo = sq3.connect('/home/jupyter-jkulaka1/opioid.db')
annual = pd.read_sql_query("SELECT * from annual", jo)
land = pd.read_sql_query("SELECT * from land", jo)
population = pd.read_sql_query("SELECT * from population", jo)
jo.close
annual.loc[(annual.BUYER_STATE == "AR") & (annual.BUYER_COUNTY == "MONTGOMERY"), "countyfips"] = "05097"
annual = annual[annual.BUYER_COUNTY.str.contains("NA") ==False]
area = land[["Areaname", "STCOU", "LND110210D"]]
area = area.rename(columns={"STCOU": "countyfips"})
county = population.merge(area, how = "left", on = "countyfips")
county
| BUYER_COUNTY | BUYER_STATE | countyfips | STATE | COUNTY | county_name | NAME | variable | year | population | Areaname | LND110210D | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | AUTAUGA | AL | 01001 | 1 | 1 | Autauga | Autauga County, Alabama | B01003_001 | 2006 | 51328 | Autauga, AL | 594.44 |
| 1 | 2 | BALDWIN | AL | 01003 | 1 | 3 | Baldwin | Baldwin County, Alabama | B01003_001 | 2006 | 168121 | Baldwin, AL | 1589.78 |
| 2 | 3 | BARBOUR | AL | 01005 | 1 | 5 | Barbour | Barbour County, Alabama | B01003_001 | 2006 | 27861 | Barbour, AL | 884.88 |
| 3 | 4 | BIBB | AL | 01007 | 1 | 7 | Bibb | Bibb County, Alabama | B01003_001 | 2006 | 22099 | Bibb, AL | 622.58 |
| 4 | 5 | BLOUNT | AL | 01009 | 1 | 9 | Blount | Blount County, Alabama | B01003_001 | 2006 | 55485 | Blount, AL | 644.78 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 28260 | 28261 | WASHAKIE | WY | 56043 | 56 | 43 | Washakie | Washakie County, Wyoming | B01003_001 | 2014 | 8444 | Washakie, WY | 2238.55 |
| 28261 | 28262 | WESTON | WY | 56045 | 56 | 45 | Weston | Weston County, Wyoming | B01003_001 | 2014 | 7135 | Weston, WY | 2398.09 |
| 28262 | 28263 | SKAGWAY | AK | 02230 | 2 | 230 | Skagway | Skagway Municipality, Alaska | B01003_001 | 2014 | 996 | Skagway, AK | 452.33 |
| 28263 | 28264 | HOONAH ANGOON | AK | 02105 | 2 | 105 | Hoonah Angoon | Hoonah-Angoon Census Area, Alaska | B01003_001 | 2014 | 2126 | Hoonah-Angoon, AK | 7524.92 |
| 28264 | 28265 | PETERSBURG | AK | 02195 | 2 | 195 | Petersburg | Petersburg Borough, Alaska | B01003_001 | 2014 | 3212 | Petersburg, AK | 3281.98 |
28265 rows × 13 columns
#4
annual['DOSAGE_UNIT'] = annual['DOSAGE_UNIT'].astype(float)
mean = annual.groupby(['year', 'BUYER_STATE'])['DOSAGE_UNIT']
mean = mean.mean().reset_index(name = "meandose")
fig = px.scatter(mean, x="BUYER_STATE", y="meandose", color="year", title = "Mean Opioid Pills Used per State by Year", labels = {"meandose" : "Mean Opioid Pills Used", "BUYER_STATE" : "State", "year" : "Year"})
fig.show()